import PyPDF2

def analysis_pdf(path, keyword):
    # 将输入分割成列表
    keyword_list = keyword.split()

    # 使用open的‘rb’方法打开pdf文件（这里必须得使用二进制rb的读取方式）
    mypdf = open(path, mode='rb')

    # 调用PdfFileReader函数
    pdf_document = PyPDF2.PdfFileReader(mypdf)

    # 使用pdf_document变量，获取各个信息
    # 或者PDF文档的页数
    pages_num = pdf_document.numPages

    # 输出PDF文档的第一页内容
    # for i in range(0, pages_num):
    #     page = pdf_document.getPage(i)
    #     text = page.extractText()
    #     for k in keyword_list:
    #         if k in text:
    #             print('第%s页有关键词'%(str(i)) + ' ' + k)
    for k in keyword_list:
        for i in range(0, pages_num):
            page = pdf_document.getPage(i)
            text = page.extractText()
            if k in text:
                print('第%s页有关键词'%(str(i)) + ' ' + k)

    first_page = pdf_document.getPage(0)
    # print(first_page.extractText())

if __name__ == '__main__':
    path = r"Emerging Topic Detection on the Meta-data of Images from Fashion Social Media..pdf"
    keyword = 'captured window' # 输入关键词以空格为间隔.
    analysis_pdf(path, keyword)
